################### PROJECT- JCR REPLICATION - data_processing_04 ##############################################
# This R file combines the raw datasets, and creates the year and municipality-level dataset for shifts in territorial control.
# Datasets generated from this codes include all datasets in the folder named as "dataset_mun_*_*.csv".
# They include all variables needed for analysis in Appendix F.
# NOTE: To run this data processing file, 
# please run "data processing_01.R" first and get the yearly level dataset on shifts in territorial control.
# Last updated: 16th, May 2025, by Wangyin Zhao
##############################################################################################################

##############################################################################################################
# Outline of Script
# A. Preparation
#    - Set working directory
#    - Load required packages
#
# B. Import Raw Datasets
#
# C. Data processing
#    1. Creating the municipality-level occurrence of rainfall-related natural disasters
#    2. Creating the municipality-level occurrence of rainfall-related natural disasters
#    3. Coding the territorial setting
#    4. Code the outcome
#
# D. Data merge
##############################################################################################################

#### A. Preparation ####
# Set working directory to Replication_Files folder
setwd("./Replication_Files")

# Import relevant packages
packages <- c("sf", "tidyverse", "exactextractr", "spdep", "zoo")
lapply(packages, library, character.only = TRUE)
########

#### B. Import raw datasets: ####
# The spatial data on the barangays from the Philippine Standard Geographic Code (PSGC) 
barangay <- st_read("./data/raw/Barangays/Barangays.shp")
# The spatial data on the barangays from the Philippine Standard Geographic Code (PSGC) 
municipalities <- st_read("./data/raw/Municipalities/Municipalities.shp")
# Barangay-level territorial control data:
control <- read_csv("./data/raw/CNN_Brgy_Affectation_2011-2014.csv")

# The data on rainfall-related natural disasters:
# the precipitation data is from the IMERG: Integrated Multi-satellite Retrievals for GPM. 
# .nc or tif. files for the precipitation for each month and grid can be downloaded from https://gpm.nasa.gov/data/imerg (better for batch downloading) 
# https://giovanni.gsfc.nasa.gov/giovanni/#service=TmAvMp&starttime=&endtime=&dataKeyword=IMERG%20Final (for a more access version)


########

#### C.Data processing ####

#### 1. Creating the municipality-level occurrence of rainfall-related natural disasters ####
#### The first way - recalculate the z-value at the municipality level ####
# As the sizes are too big, I have not included the raw data for all precipitation tif. files for each grid at each month
# the precipitation data is from the IMERG: Integrated Multi-satellite Retrievals for GPM. 
# .nc or tif. files for the precipitation for each month and grid can be downloaded from https://gpm.nasa.gov/data/imerg (better for batch downloading)
# https://giovanni.gsfc.nasa.gov/giovanni/#service=TmAvMp&starttime=&endtime=&dataKeyword=IMERG%20Final (for a more access version)

# I include the data on precipitation mean at the municipality and month level after processing and the codes for the processing all spatial files (.tif) 
# Processing codes
# map_mun <- municipalities %>% 
#  st_make_valid()
# rainfallfilelist <- list.files("data/phl/", full.names = T, pattern = "GIOVANNI-*") # List rainfall raster files in folder

# rainfallfilesdf <- data.frame(filename=rainfallfilelist, yearmonth=stringr::str_split(basename(rainfallfilelist), pattern = "_") %>% lapply(., function(x) x[4]) %>% unlist()) %>%
#  mutate(yearmonth = str_sub(yearmonth,start=15, end=20))

# fulldf <- tibble()
# for(i in rainfallfilesdf$yearmonth){
  
#  tt <- Sys.time()
#  print(i)
  
#  rainfallfilename <- rainfallfilesdf$filename[rainfallfilesdf$yearmonth==i] 
  
#  rainfallfile <- terra::rast(rainfallfilename)
  
#  rainfallrast <- terra::crop(rainfallfile, map_mun) 
#  rainfallrast <- terra::mask(rainfallrast, map_mun)
  
#  tmpres <- tibble(rainfallfile=rainfallfilename,
#                   ADM3_PCODE=map_mun$ADM3_PCODE,
#                   yearmonth=i,
#                   rainfallmean=exact_extract(rainfallrast, map_mun, "mean"))
  
#  fulldf <- bind_rows(fulldf,tmpres)
  
#  print(Sys.time()-tt)}

# fulldf <- fulldf %>% rename(ADM3_PCODE = ADM4_PCODE)

# First, upload the data on the precipitation at the municipality level 
fulldf_mun <- read_csv("data/analysis/rainfall_zonal stat for phl_mun.csv") %>% dplyr::select(ADM3_PCODE, yearmonth, rainfallmean) %>%
  mutate(yearmonth = as.numeric(yearmonth),
         year = as.numeric(str_sub(yearmonth,start=1, end=4)),
         month = as.integer(str_sub(yearmonth, start=5, end=6)))
  
output_mun <- fulldf_mun
# Second, calculate the mean precipitation for each municipality at the specific year-month 
output_mun_1 <- output_mun %>% mutate(rainfallmean = as.numeric(rainfallmean)) %>%
  group_by(ADM3_PCODE, year, month) %>%
  mutate(mean_bar = mean(rainfallmean, na.rm = TRUE)) %>% drop_na()


# Third, calculate the moving average of the mean precipitation for each climatic zone at the specific year-month
output_mun_1 <- output_mun_1 %>% select(ADM3_PCODE, year, month, mean_bar) %>% mutate(month = as.numeric(month)) %>%
  arrange(ADM3_PCODE, year, month) %>%   # Sort by location, year, and month
  group_by(ADM3_PCODE, month)%>% 
  mutate(roll = zoo::rollmean(mean_bar, k = 11, fill = NA)) %>%
  mutate(roll_sd = rollapply(mean_bar, 11, FUN = sd, fill = NA))

# Fourth, crop the specific range for the year and create a variable called code - the specific climatic zone-year-month
output_mun_f <- output_mun_1 %>% filter(year > 2005 & year < 2016)  

# Fifth, calculate the z-value
output_mun_f$z_value <- (output_mun_f$mean_bar - output_mun_f$roll)/output_mun_f$roll_sd

output_mun_f <- output_mun_f %>% mutate(nd_1.6 = if_else(z_value >= 1.6, 1, 0)) 


nd_year_mun <- output_mun_f %>% group_by(ADM3_PCODE, year) %>%
  summarise(nd_1.6 = if_else(mean(nd_1.6) > 0, 1, 0))

########

#### 2. Creating the municipality-level occurrence of rainfall-related natural disasters ####
#### The second way - aggregate the natural disaster occurrence at the barangay to the municipality level ####
fulldf <- read.csv("./data/analysis/rainfall_zonal stat for phl.csv")  %>%
  mutate(yearmonth = as.numeric(yearmonth)) %>%
  select(ADM4_PCODE, yearmonth, rainfallmean) %>%
  mutate(
    year = as.integer(str_sub(yearmonth, 1, 4)),
    month = as.integer(str_sub(yearmonth, 5, 6))) # some data cleaning


output_f <- fulldf %>%
  group_by(ADM4_PCODE, year, month) %>%
  summarise(rainfallmean = mean(rainfallmean, na.rm = TRUE), .groups = "drop") %>%
  arrange(ADM4_PCODE, year, month) %>%
  group_by(ADM4_PCODE, month) %>%
  mutate(
    roll = rollmean(rainfallmean, k = 11, fill = NA, align = "center"),
    roll_sd = rollapply(rainfallmean, width = 11, FUN = sd, fill = NA, align = "center")
  ) %>%
  ungroup() %>%
  filter(year > 2008, year < 2015) %>%
  mutate(z_value = (rainfallmean - roll) / roll_sd) %>%
  drop_na() # this code creates the z_value for the precipitation mean for each barangay at each month 

nd <- output_f %>% filter(ADM4_PCODE != "PH097332")
nd$ND <- ifelse(nd$z_value >= 1.6, 1, 0)

map_bar <- barangay
map_bar <- map_bar %>% filter(ADM4_PCODE != "PH097332") %>% st_drop_geometry()

nd_map <- nd %>% left_join(map_bar) %>%
  dplyr::select(ADM1_PCODE, ADM2_PCODE, ADM3_PCODE, ADM4_PCODE, year, month, z_value, ND) %>% st_drop_geometry() %>%
  group_by(ADM3_PCODE) %>%
  mutate(village = length(unique(ADM4_PCODE))) %>%
  ungroup() %>%
  group_by(ADM3_PCODE, year) %>%
  summarise(nd = sum(ND), village = first(village)*12) %>%
  mutate(nd = nd/village) # calculate the propertion of barangays affected by natural disasters among all within the municipality


########

#### 3. Coding the territorial setting ####
control <- control %>% dplyr::select(Region_PSGC_Name, Region_PSGC_ID_mod, Province_PSGC_Name, 
                                     Province_PSGC_ID_mod, Muni_PSGC_Name, Muni_PSGC_ID_mod, 
                                     Brgy_PSGC_Name, Brgy_PSGC_ID, year, 
                                     Infl, LessInfl, Threat, aff, Infl2, aff2, Infl3) 

control <- merge(barangay, control, by.x='ADM_ID', by.y='Brgy_PSGC_ID', all.y=TRUE)

control <- control %>% pivot_wider(names_from = "year",
                                   values_from = c("Infl", "LessInfl", "Threat", 
                                                   "aff", "Infl2", "aff2", "Infl3")) # transfer it into barangay level for visualisation purpose and would transfer it back later


control_ts <- st_make_valid(control) %>% 
  dplyr::select(ADM2_PCODE, ADM3_PCODE, ADM4_PCODE, Infl2_2011, Infl2_2012, Infl2_2013, Infl2_2014) %>%
  pivot_longer(cols = starts_with("Infl2"), 
               names_to = "year", 
               values_to = "Influence2") %>%
  mutate(year = str_sub(year,start=-4)) %>% 
  st_drop_geometry() %>% 
  mutate(ID_year = paste(ADM4_PCODE, year, sep = "_"),
         year = as.numeric(year)) %>%
  dplyr::select(ADM4_PCODE, ADM3_PCODE, ADM2_PCODE, year, Influence2) %>% 
  st_drop_geometry() %>%
  group_by(ADM3_PCODE, year) %>%
  summarise(Influence2 = sum(Influence2), village = n()) %>%
  mutate(Inf2_per = Influence2/village)
#####

#### 3.1 Classify Municipal-Level Territorial Settings Based on Rebel Influence (Threshold: 20%) ####
control_ts_0.2 <- control_ts %>% mutate(inf2_mun_0.2 = if_else(Inf2_per >= 0.2, 1, 0))

control_ts_0.2 <- control_ts_0.2 %>% pivot_wider(names_from = "year",
                                           values_from = c("Influence2", "Inf2_per", "inf2_mun_0.2")) %>%
  dplyr::select(ADM3_PCODE, inf2_mun_0.2_2011, inf2_mun_0.2_2012, inf2_mun_0.2_2013, inf2_mun_0.2_2014)


map_mun <- municipalities %>% st_make_valid()

phl_0.2 <- map_mun %>% left_join(control_ts_0.2) %>%
  dplyr::select(ADM3_PCODE, inf2_mun_0.2_2011, inf2_mun_0.2_2012, inf2_mun_0.2_2013, inf2_mun_0.2_2014) #add the geometry back 


phl_0.2_nb <- spdep::poly2nb(phl_0.2, queen = TRUE)

phl_0.2_nblist <- nb2listw(phl_0.2_nb,style = "W", zero.policy = TRUE) 

# code the territorial setting
# 2011
phl_0.2$Territory_inf2_mun_0.2_2011 <- lag.listw(x = phl_0.2_nblist, var = phl_0.2$inf2_mun_0.2_2011, zero.policy = TRUE, NAOK = TRUE) 
phl_0.2$Territory_inf2_mun_0.2_2011[phl_0.2$Territory_inf2_mun_0.2_2011 > 0 & phl_0.2$Territory_inf2_mun_0.2_2011 < 1] <- "Mixed"
phl_0.2$Territory_inf2_mun_0.2_2011[phl_0.2$Territory_inf2_mun_0.2_2011 == 0 & phl_0.2$inf2_mun_0.2_2011 == 0] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2011[phl_0.2$Territory_inf2_mun_0.2_2011 == 1 & phl_0.2$inf2_mun_0.2_2011 == 1] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2011[phl_0.2$Territory_inf2_mun_0.2_2011 == 0 & phl_0.2$inf2_mun_0.2_2011 == 1] <- "Enclave"
phl_0.2$Territory_inf2_mun_0.2_2011[phl_0.2$Territory_inf2_mun_0.2_2011 == 1 & phl_0.2$inf2_mun_0.2_2011 == 0] <- "Enclave"
prop.table(table(phl$Territory_inf2_mun_2011))

# 2012
phl_0.2$Territory_inf2_mun_0.2_2012 <- lag.listw(x = phl_0.2_nblist, var = phl_0.2$inf2_mun_0.2_2012, zero.policy = TRUE, NAOK = TRUE) 
phl_0.2$Territory_inf2_mun_0.2_2012[phl_0.2$Territory_inf2_mun_0.2_2012 > 0 & phl_0.2$Territory_inf2_mun_0.2_2012 < 1] <- "Mixed"
phl_0.2$Territory_inf2_mun_0.2_2012[phl_0.2$Territory_inf2_mun_0.2_2012 == 0 & phl_0.2$inf2_mun_0.2_2012 == 0] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2012[phl_0.2$Territory_inf2_mun_0.2_2012 == 1 & phl_0.2$inf2_mun_0.2_2012 == 1] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2012[phl_0.2$Territory_inf2_mun_0.2_2012 == 0 & phl_0.2$inf2_mun_0.2_2012 == 1] <- "Enclave"
phl_0.2$Territory_inf2_mun_0.2_2012[phl_0.2$Territory_inf2_mun_0.2_2012 == 1 & phl_0.2$inf2_mun_0.2_2012 == 0] <- "Enclave"
prop.table(table(phl_0.2$Territory_inf2_mun_2012))

# 2013
phl_0.2$Territory_inf2_mun_0.2_2013 <- lag.listw(x = phl_0.2_nblist, var = phl_0.2$inf2_mun_0.2_2013, zero.policy = TRUE, NAOK = TRUE) 
phl_0.2$Territory_inf2_mun_0.2_2013[phl_0.2$Territory_inf2_mun_0.2_2013 > 0 & phl_0.2$Territory_inf2_mun_0.2_2013 < 1] <- "Mixed"
phl_0.2$Territory_inf2_mun_0.2_2013[phl_0.2$Territory_inf2_mun_0.2_2013 == 0 & phl_0.2$inf2_mun_0.2_2013 == 0] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2013[phl_0.2$Territory_inf2_mun_0.2_2013 == 1 & phl_0.2$inf2_mun_0.2_2013 == 1] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2013[phl_0.2$Territory_inf2_mun_0.2_2013 == 0 & phl_0.2$inf2_mun_0.2_2013 == 1] <- "Enclave"
phl_0.2$Territory_inf2_mun_0.2_2013[phl_0.2$Territory_inf2_mun_0.2_2013 == 1 & phl_0.2$inf2_mun_0.2_2013 == 0] <- "Enclave"
prop.table(table(phl_0.2$Territory_inf2_mun_2013))

# 2014
phl_0.2$Territory_inf2_mun_0.2_2014 <- lag.listw(x = phl_0.2_nblist, var = phl_0.2$inf2_mun_0.2_2014, zero.policy = TRUE, NAOK = TRUE) 
phl_0.2$Territory_inf2_mun_0.2_2014[phl_0.2$Territory_inf2_mun_0.2_2014 > 0 & phl_0.2$Territory_inf2_mun_0.2_2014 < 1] <- "Mixed"
phl_0.2$Territory_inf2_mun_0.2_2014[phl_0.2$Territory_inf2_mun_0.2_2014 == 0 & phl_0.2$inf2_mun_0.2_2014 == 0] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2014[phl_0.2$Territory_inf2_mun_0.2_2014 == 1 & phl_0.2$inf2_mun_0.2_2014 == 1] <- "Homogenous"
phl_0.2$Territory_inf2_mun_0.2_2014[phl_0.2$Territory_inf2_mun_0.2_2014 == 0 & phl_0.2$inf2_mun_0.2_2014 == 1] <- "Enclave"
phl_0.2$Territory_inf2_mun_0.2_2014[phl_0.2$Territory_inf2_mun_0.2_2014 == 1 & phl_0.2$inf2_mun_0.2_2014 == 0] <- "Enclave"
prop.table(table(phl$Territory_inf2_mun_2014))

## pivot_longer
phl_mun_0.2 <- phl_0.2 %>% dplyr::select(ADM3_PCODE, Territory_inf2_mun_0.2_2011, Territory_inf2_mun_0.2_2012, Territory_inf2_mun_0.2_2013, Territory_inf2_mun_0.2_2014) %>%
  pivot_longer(cols = starts_with("Territory_inf2"), 
               names_to = "year", 
               values_to = "T_inf2") %>%
  mutate(year = str_sub(year,start=-4))


## drop the geometry
phl_mun_0.2 <- phl_mun_0.2 %>% st_drop_geometry() %>% mutate(ID_year = paste(ADM3_PCODE, year, sep = "_")) %>% 
  mutate(year = as.numeric(year),
         ID_year = paste(ADM3_PCODE, year, sep = "_")) 



phl_mun_0.2$Enclave_inf2_0.2[phl_mun_0.2$T_inf2 == "Enclave"] <- 1
phl_mun_0.2$Enclave_inf2_0.2[phl_mun_0.2$T_inf2 == "Homogenous" | phl_mun_0.2$T_inf2 == "Mixed"] <- 0
phl_mun_0.2$Mixed_inf2_0.2[phl_mun_0.2$T_inf2 == "Mixed"] <- 1
phl_mun_0.2$Mixed_inf2_0.2[phl_mun_0.2$T_inf2 == "Homogenous" | phl_mun_0.2$T_inf2 == "Enclave"] <- 0

# phl_mun_f = territorial setting 
########

#### 3.2 Classify Municipal-Level Territorial Settings Based on Rebel Influence (Threshold: 30%) ####
control_ts_0.3 <- control_ts %>% mutate(inf2_mun_0.3 = if_else(Inf2_per >= 0.3, 1, 0))

control_ts_0.3 <- control_ts_0.3 %>% pivot_wider(names_from = "year",
                                                 values_from = c("Influence2", "Inf2_per", "inf2_mun_0.3")) %>%
  dplyr::select(ADM3_PCODE, inf2_mun_0.3_2011, inf2_mun_0.3_2012, inf2_mun_0.3_2013, inf2_mun_0.3_2014)


map_mun <- municipalities %>% st_make_valid()

phl_0.3 <- map_mun %>% left_join(control_ts_0.3) %>%
  dplyr::select(ADM3_PCODE, inf2_mun_0.3_2011, inf2_mun_0.3_2012, inf2_mun_0.3_2013, inf2_mun_0.3_2014) #add the geometry back 


phl_0.3_nb <- spdep::poly2nb(phl_0.3, queen = TRUE)

phl_0.3_nblist <- nb2listw(phl_0.3_nb,style = "W", zero.policy = TRUE) 

# code the territorial setting
# 2011
phl_0.3$Territory_inf2_mun_0.3_2011 <- lag.listw(x = phl_0.3_nblist, var = phl_0.3$inf2_mun_0.3_2011, zero.policy = TRUE, NAOK = TRUE) 
phl_0.3$Territory_inf2_mun_0.3_2011[phl_0.3$Territory_inf2_mun_0.3_2011 > 0 & phl_0.3$Territory_inf2_mun_0.3_2011 < 1] <- "Mixed"
phl_0.3$Territory_inf2_mun_0.3_2011[phl_0.3$Territory_inf2_mun_0.3_2011 == 0 & phl_0.3$inf2_mun_0.3_2011 == 0] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2011[phl_0.3$Territory_inf2_mun_0.3_2011 == 1 & phl_0.3$inf2_mun_0.3_2011 == 1] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2011[phl_0.3$Territory_inf2_mun_0.3_2011 == 0 & phl_0.3$inf2_mun_0.3_2011 == 1] <- "Enclave"
phl_0.3$Territory_inf2_mun_0.3_2011[phl_0.3$Territory_inf2_mun_0.3_2011 == 1 & phl_0.3$inf2_mun_0.3_2011 == 0] <- "Enclave"
prop.table(table(phl$Territory_inf2_mun_2011))

# 2012
phl_0.3$Territory_inf2_mun_0.3_2012 <- lag.listw(x = phl_0.3_nblist, var = phl_0.3$inf2_mun_0.3_2012, zero.policy = TRUE, NAOK = TRUE) 
phl_0.3$Territory_inf2_mun_0.3_2012[phl_0.3$Territory_inf2_mun_0.3_2012 > 0 & phl_0.3$Territory_inf2_mun_0.3_2012 < 1] <- "Mixed"
phl_0.3$Territory_inf2_mun_0.3_2012[phl_0.3$Territory_inf2_mun_0.3_2012 == 0 & phl_0.3$inf2_mun_0.3_2012 == 0] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2012[phl_0.3$Territory_inf2_mun_0.3_2012 == 1 & phl_0.3$inf2_mun_0.3_2012 == 1] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2012[phl_0.3$Territory_inf2_mun_0.3_2012 == 0 & phl_0.3$inf2_mun_0.3_2012 == 1] <- "Enclave"
phl_0.3$Territory_inf2_mun_0.3_2012[phl_0.3$Territory_inf2_mun_0.3_2012 == 1 & phl_0.3$inf2_mun_0.3_2012 == 0] <- "Enclave"
prop.table(table(phl_0.3$Territory_inf2_mun_2012))

# 2013
phl_0.3$Territory_inf2_mun_0.3_2013 <- lag.listw(x = phl_0.3_nblist, var = phl_0.3$inf2_mun_0.3_2013, zero.policy = TRUE, NAOK = TRUE) 
phl_0.3$Territory_inf2_mun_0.3_2013[phl_0.3$Territory_inf2_mun_0.3_2013 > 0 & phl_0.3$Territory_inf2_mun_0.3_2013 < 1] <- "Mixed"
phl_0.3$Territory_inf2_mun_0.3_2013[phl_0.3$Territory_inf2_mun_0.3_2013 == 0 & phl_0.3$inf2_mun_0.3_2013 == 0] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2013[phl_0.3$Territory_inf2_mun_0.3_2013 == 1 & phl_0.3$inf2_mun_0.3_2013 == 1] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2013[phl_0.3$Territory_inf2_mun_0.3_2013 == 0 & phl_0.3$inf2_mun_0.3_2013 == 1] <- "Enclave"
phl_0.3$Territory_inf2_mun_0.3_2013[phl_0.3$Territory_inf2_mun_0.3_2013 == 1 & phl_0.3$inf2_mun_0.3_2013 == 0] <- "Enclave"
prop.table(table(phl_0.3$Territory_inf2_mun_2013))

# 2014
phl_0.3$Territory_inf2_mun_0.3_2014 <- lag.listw(x = phl_0.3_nblist, var = phl_0.3$inf2_mun_0.3_2014, zero.policy = TRUE, NAOK = TRUE) 
phl_0.3$Territory_inf2_mun_0.3_2014[phl_0.3$Territory_inf2_mun_0.3_2014 > 0 & phl_0.3$Territory_inf2_mun_0.3_2014 < 1] <- "Mixed"
phl_0.3$Territory_inf2_mun_0.3_2014[phl_0.3$Territory_inf2_mun_0.3_2014 == 0 & phl_0.3$inf2_mun_0.3_2014 == 0] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2014[phl_0.3$Territory_inf2_mun_0.3_2014 == 1 & phl_0.3$inf2_mun_0.3_2014 == 1] <- "Homogenous"
phl_0.3$Territory_inf2_mun_0.3_2014[phl_0.3$Territory_inf2_mun_0.3_2014 == 0 & phl_0.3$inf2_mun_0.3_2014 == 1] <- "Enclave"
phl_0.3$Territory_inf2_mun_0.3_2014[phl_0.3$Territory_inf2_mun_0.3_2014 == 1 & phl_0.3$inf2_mun_0.3_2014 == 0] <- "Enclave"
prop.table(table(phl$Territory_inf2_mun_2014))

## pivot_longer
phl_mun_0.3 <- phl_0.3 %>% dplyr::select(ADM3_PCODE, Territory_inf2_mun_0.3_2011, Territory_inf2_mun_0.3_2012, Territory_inf2_mun_0.3_2013, Territory_inf2_mun_0.3_2014) %>%
  pivot_longer(cols = starts_with("Territory_inf2"), 
               names_to = "year", 
               values_to = "T_inf2") %>%
  mutate(year = str_sub(year,start=-4))


## drop the geometry
phl_mun_0.3 <- phl_mun_0.3 %>% st_drop_geometry() %>% mutate(ID_year = paste(ADM3_PCODE, year, sep = "_")) %>% 
  mutate(year = as.numeric(year),
         ID_year = paste(ADM3_PCODE, year, sep = "_"))

 
phl_mun_0.3$Enclave_inf2_0.3[phl_mun_0.3$T_inf2 == "Enclave"] <- 1
phl_mun_0.3$Enclave_inf2_0.3[phl_mun_0.3$T_inf2 == "Homogenous" | phl_mun_0.3$T_inf2 == "Mixed"] <- 0
phl_mun_0.3$Mixed_inf2_0.3[phl_mun_0.3$T_inf2 == "Mixed"] <- 1
phl_mun_0.3$Mixed_inf2_0.3[phl_mun_0.3$T_inf2 == "Homogenous" | phl_mun_0.3$T_inf2 == "Enclave"] <- 0

########

#### 3.3 Classify Municipal-Level Territorial Settings Based on Rebel Influence (Threshold: 40%) ####
control_ts_0.4 <- control_ts %>% mutate(inf2_mun_0.4 = if_else(Inf2_per >= 0.4, 1, 0))

control_ts_0.4 <- control_ts_0.4 %>% pivot_wider(names_from = "year",
                                                 values_from = c("Influence2", "Inf2_per", "inf2_mun_0.4")) %>%
  dplyr::select(ADM3_PCODE, inf2_mun_0.4_2011, inf2_mun_0.4_2012, inf2_mun_0.4_2013, inf2_mun_0.4_2014)

phl_0.4 <- map_mun %>% left_join(control_ts_0.4) %>%
  dplyr::select(ADM3_PCODE, inf2_mun_0.4_2011, inf2_mun_0.4_2012, inf2_mun_0.4_2013, inf2_mun_0.4_2014) #add the geometry back 


phl_0.4_nb <- spdep::poly2nb(phl_0.4, queen = TRUE)

phl_0.4_nblist <- nb2listw(phl_0.4_nb,style = "W", zero.policy = TRUE) 

# code the territorial setting
# 2011
phl_0.4$Territory_inf2_mun_0.4_2011 <- lag.listw(x = phl_0.4_nblist, var = phl_0.4$inf2_mun_0.4_2011, zero.policy = TRUE, NAOK = TRUE) 
phl_0.4$Territory_inf2_mun_0.4_2011[phl_0.4$Territory_inf2_mun_0.4_2011 > 0 & phl_0.4$Territory_inf2_mun_0.4_2011 < 1] <- "Mixed"
phl_0.4$Territory_inf2_mun_0.4_2011[phl_0.4$Territory_inf2_mun_0.4_2011 == 0 & phl_0.4$inf2_mun_0.4_2011 == 0] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2011[phl_0.4$Territory_inf2_mun_0.4_2011 == 1 & phl_0.4$inf2_mun_0.4_2011 == 1] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2011[phl_0.4$Territory_inf2_mun_0.4_2011 == 0 & phl_0.4$inf2_mun_0.4_2011 == 1] <- "Enclave"
phl_0.4$Territory_inf2_mun_0.4_2011[phl_0.4$Territory_inf2_mun_0.4_2011 == 1 & phl_0.4$inf2_mun_0.4_2011 == 0] <- "Enclave"

# 2012
phl_0.4$Territory_inf2_mun_0.4_2012 <- lag.listw(x = phl_0.4_nblist, var = phl_0.4$inf2_mun_0.4_2012, zero.policy = TRUE, NAOK = TRUE) 
phl_0.4$Territory_inf2_mun_0.4_2012[phl_0.4$Territory_inf2_mun_0.4_2012 > 0 & phl_0.4$Territory_inf2_mun_0.4_2012 < 1] <- "Mixed"
phl_0.4$Territory_inf2_mun_0.4_2012[phl_0.4$Territory_inf2_mun_0.4_2012 == 0 & phl_0.4$inf2_mun_0.4_2012 == 0] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2012[phl_0.4$Territory_inf2_mun_0.4_2012 == 1 & phl_0.4$inf2_mun_0.4_2012 == 1] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2012[phl_0.4$Territory_inf2_mun_0.4_2012 == 0 & phl_0.4$inf2_mun_0.4_2012 == 1] <- "Enclave"
phl_0.4$Territory_inf2_mun_0.4_2012[phl_0.4$Territory_inf2_mun_0.4_2012 == 1 & phl_0.4$inf2_mun_0.4_2012 == 0] <- "Enclave"

# 2013
phl_0.4$Territory_inf2_mun_0.4_2013 <- lag.listw(x = phl_0.4_nblist, var = phl_0.4$inf2_mun_0.4_2013, zero.policy = TRUE, NAOK = TRUE) 
phl_0.4$Territory_inf2_mun_0.4_2013[phl_0.4$Territory_inf2_mun_0.4_2013 > 0 & phl_0.4$Territory_inf2_mun_0.4_2013 < 1] <- "Mixed"
phl_0.4$Territory_inf2_mun_0.4_2013[phl_0.4$Territory_inf2_mun_0.4_2013 == 0 & phl_0.4$inf2_mun_0.4_2013 == 0] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2013[phl_0.4$Territory_inf2_mun_0.4_2013 == 1 & phl_0.4$inf2_mun_0.4_2013 == 1] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2013[phl_0.4$Territory_inf2_mun_0.4_2013 == 0 & phl_0.4$inf2_mun_0.4_2013 == 1] <- "Enclave"
phl_0.4$Territory_inf2_mun_0.4_2013[phl_0.4$Territory_inf2_mun_0.4_2013 == 1 & phl_0.4$inf2_mun_0.4_2013 == 0] <- "Enclave"

# 2014
phl_0.4$Territory_inf2_mun_0.4_2014 <- lag.listw(x = phl_0.4_nblist, var = phl_0.4$inf2_mun_0.4_2014, zero.policy = TRUE, NAOK = TRUE) 
phl_0.4$Territory_inf2_mun_0.4_2014[phl_0.4$Territory_inf2_mun_0.4_2014 > 0 & phl_0.4$Territory_inf2_mun_0.4_2014 < 1] <- "Mixed"
phl_0.4$Territory_inf2_mun_0.4_2014[phl_0.4$Territory_inf2_mun_0.4_2014 == 0 & phl_0.4$inf2_mun_0.4_2014 == 0] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2014[phl_0.4$Territory_inf2_mun_0.4_2014 == 1 & phl_0.4$inf2_mun_0.4_2014 == 1] <- "Homogenous"
phl_0.4$Territory_inf2_mun_0.4_2014[phl_0.4$Territory_inf2_mun_0.4_2014 == 0 & phl_0.4$inf2_mun_0.4_2014 == 1] <- "Enclave"
phl_0.4$Territory_inf2_mun_0.4_2014[phl_0.4$Territory_inf2_mun_0.4_2014 == 1 & phl_0.4$inf2_mun_0.4_2014 == 0] <- "Enclave"

## pivot_longer
phl_mun_0.4 <- phl_0.4 %>% dplyr::select(ADM3_PCODE, Territory_inf2_mun_0.4_2011, Territory_inf2_mun_0.4_2012, Territory_inf2_mun_0.4_2013, Territory_inf2_mun_0.4_2014) %>%
  pivot_longer(cols = starts_with("Territory_inf2"), 
               names_to = "year", 
               values_to = "T_inf2") %>%
  mutate(year = str_sub(year,start=-4))


## drop the geometry
phl_mun_0.4 <- phl_mun_0.4 %>% st_drop_geometry() %>% mutate(ID_year = paste(ADM3_PCODE, year, sep = "_")) %>% 
  mutate(year = as.numeric(year),
         ID_year = paste(ADM3_PCODE, year, sep = "_"))


phl_mun_0.4$Enclave_inf2_0.4[phl_mun_0.4$T_inf2 == "Enclave"] <- 1
phl_mun_0.4$Enclave_inf2_0.4[phl_mun_0.4$T_inf2 == "Homogenous" | phl_mun_0.4$T_inf2 == "Mixed"] <- 0
phl_mun_0.4$Mixed_inf2_0.4[phl_mun_0.4$T_inf2 == "Mixed"] <- 1
phl_mun_0.4$Mixed_inf2_0.4[phl_mun_0.4$T_inf2 == "Homogenous" | phl_mun_0.4$T_inf2 == "Enclave"] <- 0

########

#### 3.4 Code the outcome ####
outcome_0.2 <- control_ts %>% mutate(inf2_mun_0.2 = if_else(Inf2_per >= 0.2, 1, 0)) %>%
  arrange(ADM3_PCODE, year) %>%
  group_by(ADM3_PCODE) %>%
  mutate(in2_lag_0.2 = dplyr::lag(inf2_mun_0.2),
         Tchange_in2_0.2 = inf2_mun_0.2 - in2_lag_0.2) %>% 
  dplyr::select(ADM3_PCODE, year, Tchange_in2_0.2)

outcome_0.2$Tchange_in2_0.2[outcome_0.2$Tchange_in2_0.2 == -1] <- 1 


outcome_0.3 <- control_ts %>% mutate(inf2_mun_0.3 = if_else(Inf2_per >= 0.3, 1, 0)) %>%
  arrange(ADM3_PCODE, year) %>%
  group_by(ADM3_PCODE) %>%
  mutate(in2_lag_0.3 = dplyr::lag(inf2_mun_0.3),
         Tchange_in2_0.3 = inf2_mun_0.3 - in2_lag_0.3) %>% 
  dplyr::select(ADM3_PCODE, year, Tchange_in2_0.3)

outcome_0.3$Tchange_in2_0.3[outcome_0.3$Tchange_in2_0.3 == -1] <- 1 

outcome_0.4 <- control_ts %>% mutate(inf2_mun_0.4 = if_else(Inf2_per >= 0.4, 1, 0)) %>%
  arrange(ADM3_PCODE, year) %>%
  group_by(ADM3_PCODE) %>%
  mutate(in2_lag_0.4 = dplyr::lag(inf2_mun_0.4),
         Tchange_in2_0.4 = inf2_mun_0.4 - in2_lag_0.4) %>% 
  dplyr::select(ADM3_PCODE, year, Tchange_in2_0.4)

outcome_0.4$Tchange_in2_0.4[outcome_0.4$Tchange_in2_0.4 == -1] <- 1 
########

#### D. Data merge ####
#0.2_nd_year_mun
dataset_mun_0.2_1 <- phl_mun_0.2 %>% right_join(nd_year_mun) %>% left_join(outcome_0.2) %>% 
  filter(year > 2010 & year < 2015) %>% 
  arrange(ADM3_PCODE,year) %>% group_by(ADM3_PCODE) %>% 
  mutate(Enclave_inf2_0.2 = dplyr::lag(Enclave_inf2_0.2, n=1),
         Mixed_inf2_0.2 = dplyr::lag(Mixed_inf2_0.2, n=1))
#0.2_nd_map
dataset_mun_0.2_2 <- phl_mun_0.2 %>% right_join(nd_map) %>% left_join(outcome_0.2) %>% 
  filter(year > 2010 & year < 2015) %>% 
  arrange(ADM3_PCODE,year) %>% group_by(ADM3_PCODE) %>% 
  mutate(Enclave_inf2_0.2 = dplyr::lag(Enclave_inf2_0.2, n=1),
         Mixed_inf2_0.2 = dplyr::lag(Mixed_inf2_0.2, n=1))

#0.3_nd_year_mun
dataset_mun_0.3_1 <- phl_mun_0.3 %>% right_join(nd_year_mun) %>% left_join(outcome_0.3) %>% 
  filter(year > 2010 & year < 2015) %>% 
  arrange(ADM3_PCODE,year) %>% group_by(ADM3_PCODE) %>% 
  mutate(Enclave_inf2_0.3 = dplyr::lag(Enclave_inf2_0.3, n=1),
         Mixed_inf2_0.3 = dplyr::lag(Mixed_inf2_0.3, n=1))
#0.3_nd_map
dataset_mun_0.3_2 <- phl_mun_0.3 %>% right_join(nd_map) %>% left_join(outcome_0.3) %>% 
  filter(year > 2010 & year < 2015) %>% 
  arrange(ADM3_PCODE,year) %>% group_by(ADM3_PCODE) %>% 
  mutate(Enclave_inf2_0.3 = dplyr::lag(Enclave_inf2_0.3, n=1),
         Mixed_inf2_0.3 = dplyr::lag(Mixed_inf2_0.3, n=1))

#0.4_nd_year_mun
dataset_mun_0.4_1 <- phl_mun_0.4 %>% right_join(nd_year_mun) %>% left_join(outcome_0.4) %>% 
  filter(year > 2010 & year < 2015) %>% 
  arrange(ADM3_PCODE,year) %>% group_by(ADM3_PCODE) %>% 
  mutate(Enclave_inf2_0.4 = dplyr::lag(Enclave_inf2_0.4, n=1),
         Mixed_inf2_0.4 = dplyr::lag(Mixed_inf2_0.4, n=1))
#0.4_nd_map
dataset_mun_0.4_2 <- phl_mun_0.4 %>% right_join(nd_map) %>% left_join(outcome_0.4) %>% 
  filter(year > 2010 & year < 2015) %>% 
  arrange(ADM3_PCODE,year) %>% group_by(ADM3_PCODE) %>% 
  mutate(Enclave_inf2_0.4 = dplyr::lag(Enclave_inf2_0.4, n=1),
         Mixed_inf2_0.4 = dplyr::lag(Mixed_inf2_0.4, n=1))

########

write_csv(dataset_mun_0.2_1, "./data/analysis/dataset_mun/dataset_mun_0.2_1.csv", na = "NA")
write_csv(dataset_mun_0.2_2, "./data/analysis/dataset_mun/dataset_mun_0.2_2.csv", na = "NA")
write_csv(dataset_mun_0.3_1, "./data/analysis/dataset_mun/dataset_mun_0.3_1.csv", na = "NA")
write_csv(dataset_mun_0.3_2, "./data/analysis/dataset_mun/dataset_mun_0.3_2.csv", na = "NA")
write_csv(dataset_mun_0.4_1, "./data/analysis/dataset_mun/dataset_mun_0.4_1.csv", na = "NA")
write_csv(dataset_mun_0.4_2, "./data/analysis/dataset_mun/dataset_mun_0.4_2.csv", na = "NA")
